nsivabalan commented on a change in pull request #3289: URL: https://github.com/apache/hudi/pull/3289#discussion_r744182814
########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveShimLoader.java ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hive.common.util.HiveVersionInfo; +import org.apache.hudi.hive.client.HiveShim; +import org.apache.hudi.hive.client.HiveShimV220; +import org.apache.hudi.hive.client.HiveShimV210; +import org.apache.hudi.hive.client.HiveShimV201; +import org.apache.hudi.hive.client.HiveShimV122; +import org.apache.hudi.hive.client.HiveShimV121; +import org.apache.hudi.hive.client.HiveShimV111; +import org.apache.hudi.hive.client.HiveShimV234; +import org.apache.hudi.hive.client.HiveShimV230; +import org.apache.hudi.hive.client.HiveShimV233; +import org.apache.hudi.hive.client.HiveShimV100; +import org.apache.hudi.hive.client.HiveShimV101; +import org.apache.hudi.hive.client.HiveShimV110; +import org.apache.hudi.hive.client.HiveShimV120; +import org.apache.hudi.hive.client.HiveShimV200; +import org.apache.hudi.hive.client.HiveShimV211; +import org.apache.hudi.hive.client.HiveShimV231; +import org.apache.hudi.hive.client.HiveShimV232; +import org.apache.hudi.hive.client.HiveShimV235; +import org.apache.hudi.hive.client.HiveShimV236; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * A loader to load HiveShim. + * + * This code is mainly copy from Apache/Flink. + */ +public class HiveShimLoader { + + private static final Logger LOG = LoggerFactory.getLogger(HiveShimLoader.class); + + public static final String HIVE_VERSION_V1_0_0 = "1.0.0"; + public static final String HIVE_VERSION_V1_0_1 = "1.0.1"; + public static final String HIVE_VERSION_V1_1_0 = "1.1.0"; + public static final String HIVE_VERSION_V1_1_1 = "1.1.1"; + public static final String HIVE_VERSION_V1_2_0 = "1.2.0"; + public static final String HIVE_VERSION_V1_2_1 = "1.2.1"; + public static final String HIVE_VERSION_V1_2_2 = "1.2.2"; + public static final String HIVE_VERSION_V2_0_0 = "2.0.0"; + public static final String HIVE_VERSION_V2_0_1 = "2.0.1"; + public static final String HIVE_VERSION_V2_1_0 = "2.1.0"; + public static final String HIVE_VERSION_V2_1_1 = "2.1.1"; + public static final String HIVE_VERSION_V2_2_0 = "2.2.0"; + public static final String HIVE_VERSION_V2_3_0 = "2.3.0"; + public static final String HIVE_VERSION_V2_3_1 = "2.3.1"; + public static final String HIVE_VERSION_V2_3_2 = "2.3.2"; + public static final String HIVE_VERSION_V2_3_3 = "2.3.3"; + public static final String HIVE_VERSION_V2_3_4 = "2.3.4"; + public static final String HIVE_VERSION_V2_3_5 = "2.3.5"; + public static final String HIVE_VERSION_V2_3_6 = "2.3.6"; + + private static final Map<String, HiveShim> HIVE_SHIMS = new ConcurrentHashMap<>(2); + + private HiveShimLoader() { + } + + public static HiveShim loadHiveShim(String version) { + return HIVE_SHIMS.computeIfAbsent(version, (v) -> { + if (v.startsWith(HIVE_VERSION_V1_0_0)) { + return new HiveShimV100(); + } + if (v.startsWith(HIVE_VERSION_V1_0_1)) { + return new HiveShimV101(); + } + if (v.startsWith(HIVE_VERSION_V1_1_0)) { + return new HiveShimV110(); + } + if (v.startsWith(HIVE_VERSION_V1_1_1)) { + return new HiveShimV111(); + } + if (v.startsWith(HIVE_VERSION_V1_2_0)) { + return new HiveShimV120(); + } + if (v.startsWith(HIVE_VERSION_V1_2_1)) { + return new HiveShimV121(); + } + if (v.startsWith(HIVE_VERSION_V1_2_2)) { + return new HiveShimV122(); + } + if (v.startsWith(HIVE_VERSION_V2_0_0)) { + return new HiveShimV200(); + } + if (v.startsWith(HIVE_VERSION_V2_0_1)) { + return new HiveShimV201(); + } + if (v.startsWith(HIVE_VERSION_V2_1_0)) { + return new HiveShimV210(); + } + if (v.startsWith(HIVE_VERSION_V2_1_1)) { + return new HiveShimV211(); + } + if (v.startsWith(HIVE_VERSION_V2_2_0)) { + return new HiveShimV220(); + } + if (v.startsWith(HIVE_VERSION_V2_3_0)) { + return new HiveShimV230(); + } + if (v.startsWith(HIVE_VERSION_V2_3_1)) { + return new HiveShimV231(); + } + if (v.startsWith(HIVE_VERSION_V2_3_2)) { + return new HiveShimV232(); + } + if (v.startsWith(HIVE_VERSION_V2_3_3)) { + return new HiveShimV233(); + } + if (v.startsWith(HIVE_VERSION_V2_3_4)) { + return new HiveShimV234(); + } + if (v.startsWith(HIVE_VERSION_V2_3_5)) { + return new HiveShimV235(); + } + if (v.startsWith(HIVE_VERSION_V2_3_6)) { + return new HiveShimV236(); + } + throw new HoodieHiveSyncException("Unsupported Hive version " + v); Review comment: can you help me understand what do we do today if someone tries with higher hive versions? do we explicitly fail the call somewhere? Or are we changing the behavior with this patch ? ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveMetastoreClientWrapper.java ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Table; + +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hive.client.HiveShim; + +import org.apache.thrift.TException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; + +/** + * Wrapper class for Hive Metastore Client, which embeds a HiveShim layer to handle different Hive versions. + * Methods provided mostly conforms to IMetaStoreClient interfaces except those that require shims. + * + * This code is mainly copy from Apache/Flink. + */ +public class HiveMetastoreClientWrapper implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreClientWrapper.class); + + private final IMetaStoreClient client; + + private final HiveConf hiveConf; + + private final HiveShim hiveShim; + + public HiveMetastoreClientWrapper(HiveConf hiveConf, String hiveVersion) { + checkArgument(hiveVersion != null, "hive version cannot be null"); + this.hiveConf = hiveConf; + hiveShim = HiveShimLoader.loadHiveShim(StringUtils.isNullOrEmpty(hiveVersion) ? HiveShimLoader.getHiveVersion() : hiveVersion); + client = createMetastoreClient(); + } + + @Override + public void close() { + client.close(); + } + + public List<String> getAllTables(String databaseName) throws TException { + return client.getAllTables(databaseName); + } + + public List<Partition> listPartitions(String dbName, String tblName, short max) throws TException { + return client.listPartitions(dbName, tblName, max); + } + + public boolean tableExists(String databaseName, String tableName) throws TException { + return client.tableExists(databaseName, tableName); + } + + public Database getDatabase(String name) throws TException { + return client.getDatabase(name); + } + + public void createDatabase(Database database) throws TException { + client.createDatabase(database); + } + + public Table getTable(String databaseName, String tableName) throws TException { + return client.getTable(databaseName, tableName); + } + + public void createTable(Table table) throws TException { Review comment: guess in this patch, we are focussing only on DDL and DML will be added as a follow up? ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/client/HiveShim.java ########## @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive.client; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; + +import org.apache.thrift.TException; + +/** + * A shim layer to support different versions of Hive. + * + * This code and it's concrete implement is mainly copy from Apache/Flink. + */ +public interface HiveShim { + + /** + * Create a Hive Metastore client based on the given HiveConf object. + * + * @param hiveConf HiveConf instance + * @return an IMetaStoreClient instance + */ + IMetaStoreClient getHiveMetastoreClient(HiveConf hiveConf); + + /** + * Alters a Hive table. + * + * @param client the Hive metastore client + * @param databaseName the name of the database to which the table belongs + * @param tableName the name of the table to be altered + * @param table the new Hive table + */ + void alterTable(IMetaStoreClient client, String databaseName, String tableName, Table table) Review comment: Can we give a default impl here and override only where its required ? ``` client.alter_table(databaseName, tableName, table); ``` ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveMetastoreClientWrapper.java ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Table; + +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hive.client.HiveShim; + +import org.apache.thrift.TException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; + +/** + * Wrapper class for Hive Metastore Client, which embeds a HiveShim layer to handle different Hive versions. + * Methods provided mostly conforms to IMetaStoreClient interfaces except those that require shims. + * + * This code is mainly copy from Apache/Flink. + */ +public class HiveMetastoreClientWrapper implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreClientWrapper.class); + + private final IMetaStoreClient client; + + private final HiveConf hiveConf; + + private final HiveShim hiveShim; + + public HiveMetastoreClientWrapper(HiveConf hiveConf, String hiveVersion) { + checkArgument(hiveVersion != null, "hive version cannot be null"); + this.hiveConf = hiveConf; + hiveShim = HiveShimLoader.loadHiveShim(StringUtils.isNullOrEmpty(hiveVersion) ? HiveShimLoader.getHiveVersion() : hiveVersion); + client = createMetastoreClient(); + } + + @Override + public void close() { + client.close(); + } + + public List<String> getAllTables(String databaseName) throws TException { Review comment: Are these methods intentionally placed here rather than in HiveShim? Hiveshim is the one that creates the IMetaStoreClient anyways. so was curious why not expose all methods in HiveShim only. Do we want to keep HiveSim cleaner to hold methods only which we are looking to bypass the direct client call ? ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveMetastoreClientWrapper.java ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Table; + +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hive.client.HiveShim; + +import org.apache.thrift.TException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; + +/** + * Wrapper class for Hive Metastore Client, which embeds a HiveShim layer to handle different Hive versions. + * Methods provided mostly conforms to IMetaStoreClient interfaces except those that require shims. + * + * This code is mainly copy from Apache/Flink. + */ +public class HiveMetastoreClientWrapper implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreClientWrapper.class); + + private final IMetaStoreClient client; + + private final HiveConf hiveConf; + + private final HiveShim hiveShim; + + public HiveMetastoreClientWrapper(HiveConf hiveConf, String hiveVersion) { + checkArgument(hiveVersion != null, "hive version cannot be null"); + this.hiveConf = hiveConf; + hiveShim = HiveShimLoader.loadHiveShim(StringUtils.isNullOrEmpty(hiveVersion) ? HiveShimLoader.getHiveVersion() : hiveVersion); + client = createMetastoreClient(); + } + + @Override + public void close() { Review comment: can you help me understand where do we call this? I see in HiveQueryDDLExecutor, we do the following. ``` if (client != null) { Hive.closeCurrent(); } ``` ########## File path: hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java ########## @@ -938,8 +939,8 @@ public void testReadSchemaForMOR(String syncMode) throws Exception { @Test public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxException, HiveException, MetaException { - hiveSyncConfig.useJdbc = true; - HiveTestUtil.hiveSyncConfig.useJdbc = true; + hiveSyncConfig.useJdbc = false; Review comment: may I know why this change? ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveMetastoreClientWrapper.java ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Table; + +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hive.client.HiveShim; + +import org.apache.thrift.TException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.apache.hudi.common.util.ValidationUtils.checkArgument; + +/** + * Wrapper class for Hive Metastore Client, which embeds a HiveShim layer to handle different Hive versions. + * Methods provided mostly conforms to IMetaStoreClient interfaces except those that require shims. + * + * This code is mainly copy from Apache/Flink. + */ +public class HiveMetastoreClientWrapper implements AutoCloseable { Review comment: Few other names I can think of. not too strong on any of them. wanted to hear your thoughts. HoodieHiveMetastoreClient HiveMetastoreClientDelegator. ########## File path: hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java ########## @@ -120,6 +120,9 @@ @Parameter(names = {"--spark-schema-length-threshold"}, description = "The maximum length allowed in a single cell when storing additional schema information in Hive's metastore.") public int sparkSchemaLengthThreshold = 4000; + @Parameter(names = {"--hive-version"}, description = "hive version that hudi use, not suggests user to specific", required = false) Review comment: can you help me understand why would someone wants to override to use a diff hive version than whats actually being used? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
