yuqi1129 commented on code in PR #9416: URL: https://github.com/apache/gravitino/pull/9416#discussion_r2602513584
########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/Util.java: ########## @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.hive.client; + +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.Properties; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +public class Util { + + public static final String HIVE_CONFIG_RESOURCES = "hive.config.resources"; + + public static Method findMethod(Class<?> klass, String name, Class<?>... args) + throws NoSuchMethodException { + return klass.getMethod(name, args); Review Comment: You may use `MethodUtils.getAccessibleMethod` to simply the code. ########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/ProxyHiveClientImpl.java: ########## @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.hive.client; + +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; + +public class ProxyHiveClientImpl implements InvocationHandler { + @Override + public Object invoke(Object o, Method method, Object[] objects) throws Throwable { + return null; Review Comment: Will you try to implement this method in the next PR? ########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/HiveClientFactory.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.hive.client; + +import static org.apache.gravitino.catalog.hive.HiveConstants.HIVE_METASTORE_URIS; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE2; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE3; +import static org.apache.gravitino.hive.client.Util.buildConfigurationFromProperties; + +import com.google.common.base.Preconditions; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.util.Properties; +import org.apache.gravitino.exceptions.GravitinoRuntimeException; +import org.apache.gravitino.utils.PrincipalUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class HiveClientFactory { + private static final Logger LOG = LoggerFactory.getLogger(HiveClientFactory.class); + + // Remember which Hive backend version worked successfully for this factory. + private volatile HiveClientClassLoader.HiveVersion backendVersion; + private volatile HiveClientClassLoader backendClassLoader; + + @SuppressWarnings("UnusedVariable") + private final Configuration hadoopConf; + + private final Properties properties; + + /** + * Creates a {@link HiveClientFactory} bound to the given configuration properties. + * + * @param properties Hive client configuration, must not be null. + * @param id An identifier for this factory instance. + */ + public HiveClientFactory(Properties properties, String id) { + Preconditions.checkArgument(properties != null, "Properties cannot be null"); + this.properties = properties; + + try { + this.hadoopConf = buildConfigurationFromProperties(properties); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize HiveClientFactory", e); + } + } + + public HiveClient createHiveClient() { + HiveClient client = null; + try { + if (backendVersion != null) { + HiveClientClassLoader classLoader = getOrCreateClassLoader(backendVersion); Review Comment: When will we close the `HiveClientClassLoader`? Can we share the class loader with catalogs with the same Hive version? ########## catalogs/hive-metastore3-libs/build.gradle.kts: ########## @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.publish.maven.tasks.PublishToMavenLocal +import org.gradle.api.publish.maven.tasks.PublishToMavenRepository + +plugins { + id("java") + id("idea") +} + +dependencies { + implementation(libs.hive3.metastore) { Review Comment: Why do we need `{}` in the end? ########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/HiveClientFactory.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.hive.client; + +import static org.apache.gravitino.catalog.hive.HiveConstants.HIVE_METASTORE_URIS; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE2; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE3; +import static org.apache.gravitino.hive.client.Util.buildConfigurationFromProperties; + +import com.google.common.base.Preconditions; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.util.Properties; +import org.apache.gravitino.exceptions.GravitinoRuntimeException; +import org.apache.gravitino.utils.PrincipalUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class HiveClientFactory { + private static final Logger LOG = LoggerFactory.getLogger(HiveClientFactory.class); + + // Remember which Hive backend version worked successfully for this factory. + private volatile HiveClientClassLoader.HiveVersion backendVersion; Review Comment: Do we plan to share the `HiveClientFactory` for Hive2 and Hive3? Why do we need to make it `volatile`? Will there be only one `HiveClientFactory` instance globally? ########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/HiveShim.java: ########## @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.hive.client; + +import java.util.List; +import java.util.Properties; +import org.apache.gravitino.hive.HivePartition; +import org.apache.gravitino.hive.HiveSchema; +import org.apache.gravitino.hive.HiveTable; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.thrift.TException; + +/** + * Java translation of Scala's `Shim` sealed abstract class. + * + * <p>This class declares the compatibility layer between Spark and different Hive versions. + * Concrete subclasses (e.g. HiveShimV2, HiveShimV3 ...) must implement these methods according to + * the behavior of the corresponding Hive release. + */ +public abstract class HiveShim { + + protected static final String RETRYING_META_STORE_CLIENT_CLASS = + "org.apache.hadoop.hive.metastore.RetryingMetaStoreClient"; + protected static final String IMETA_STORE_CLIENT_CLASS = + "org.apache.hadoop.hive.metastore.IMetaStoreClient"; + protected static final String HIVE_CONF_CLASS = "org.apache.hadoop.hive.conf.HiveConf"; + protected static final String CONFIGURATION_CLASS = "org.apache.hadoop.conf.Configuration"; + protected static final String METHOD_GET_PROXY = "getProxy"; + + protected final IMetaStoreClient client; + protected final HiveClientClassLoader.HiveVersion version; + + protected HiveShim(HiveClientClassLoader.HiveVersion version, Properties properties) { + this.client = createMetaStoreClient(properties); + this.version = version; + } + + public abstract IMetaStoreClient createMetaStoreClient(Properties properties); + + public List<String> getAllDatabases(String catalogName) { + try { + return client.getAllDatabases(); + } catch (TException e) { + throw HiveExceptionConverter.toGravitinoException( + e, HiveExceptionConverter.ExceptionTarget.catalog(catalogName)); + } + } + + public abstract void createDatabase(HiveSchema database); + + public abstract HiveSchema getDatabase(String catalogName, String databaseName); + + public abstract void alterDatabase(String catalogName, String databaseName, HiveSchema database); + + public abstract void dropDatabase(String catalogName, String databaseName, boolean cascade); + + public abstract List<String> getAllTables(String catalogName, String databaseName); + + public abstract List<String> listTableNamesByFilter( + String catalogName, String databaseName, String filter, short pageSize); + + public abstract HiveTable getTable(String catalogName, String databaseName, String tableName); + + public abstract void alterTable( + String catalogName, String databaseName, String tableName, HiveTable alteredHiveTable); + + public abstract void dropTable( + String catalogName, + String databaseName, + String tableName, + boolean deleteData, + boolean ifPurge); + + public abstract void createTable(HiveTable hiveTable); + Review Comment: Does this class cover all Hive method used in Gravitino? ########## catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/client/HiveClientFactory.java: ########## @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.hive.client; + +import static org.apache.gravitino.catalog.hive.HiveConstants.HIVE_METASTORE_URIS; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE2; +import static org.apache.gravitino.hive.client.HiveClientClassLoader.HiveVersion.HIVE3; +import static org.apache.gravitino.hive.client.Util.buildConfigurationFromProperties; + +import com.google.common.base.Preconditions; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.util.Properties; +import org.apache.gravitino.exceptions.GravitinoRuntimeException; +import org.apache.gravitino.utils.PrincipalUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class HiveClientFactory { + private static final Logger LOG = LoggerFactory.getLogger(HiveClientFactory.class); + + // Remember which Hive backend version worked successfully for this factory. + private volatile HiveClientClassLoader.HiveVersion backendVersion; + private volatile HiveClientClassLoader backendClassLoader; + + @SuppressWarnings("UnusedVariable") + private final Configuration hadoopConf; + + private final Properties properties; + + /** + * Creates a {@link HiveClientFactory} bound to the given configuration properties. + * + * @param properties Hive client configuration, must not be null. + * @param id An identifier for this factory instance. + */ + public HiveClientFactory(Properties properties, String id) { + Preconditions.checkArgument(properties != null, "Properties cannot be null"); + this.properties = properties; + + try { + this.hadoopConf = buildConfigurationFromProperties(properties); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize HiveClientFactory", e); + } + } + + public HiveClient createHiveClient() { + HiveClient client = null; + try { + if (backendVersion != null) { + HiveClientClassLoader classLoader = getOrCreateClassLoader(backendVersion); + client = createHiveClientInternal(classLoader); + LOG.info("Connected to Hive Metastore using cached Hive version {}", backendVersion.name()); + return client; + } + } catch (Exception e) { + LOG.warn( + "Failed to connect to Hive Metastore using cached Hive version {}", backendVersion, e); + throw new RuntimeException("Failed to connect to Hive Metastore", e); + } + + try { + // Try using Hive3 first + HiveClientClassLoader classloader = getOrCreateClassLoader(HIVE3); + client = createHiveClientInternal(classloader); + client.getCatalogs(); + LOG.info("Connected to Hive Metastore using Hive version HIVE3"); + backendClassLoader = classloader; + backendVersion = HiveClientClassLoader.HiveVersion.HIVE3; + return client; + + } catch (GravitinoRuntimeException e) { + if (client != null) { + client.close(); + } + + try { + // Fallback to Hive2 if we can list databases + if (e.getMessage().contains("Invalid method name: 'get_catalogs'") + || e.getMessage().contains("class not found") // caused by MiniHiveMetastoreService + ) { + HiveClientClassLoader classloader = getOrCreateClassLoader(HIVE2); + client = createHiveClientInternal(classloader); + LOG.info("Connected to Hive Metastore using Hive version HIVE2"); + backendClassLoader = classloader; + backendVersion = HIVE2; + return client; + } + throw e; + + } catch (Exception ex) { + LOG.error("Failed to connect to Hive Metastore using both Hive3 and Hive2", ex); + throw e; + } + } catch (Exception e) { + throw HiveExceptionConverter.toGravitinoException( + e, HiveExceptionConverter.ExceptionTarget.other("")); + } + } + + private HiveClientClassLoader getOrCreateClassLoader(HiveClientClassLoader.HiveVersion version) + throws Exception { + if (backendVersion != version) { + backendClassLoader = + HiveClientClassLoader.createLoader( + version, Thread.currentThread().getContextClassLoader()); + backendVersion = version; + } + return backendClassLoader; + } + + public static HiveClient createHiveClientImpl( + HiveClientClassLoader.HiveVersion version, Properties properties, ClassLoader classloader) + throws Exception { + Class<?> hiveClientImplClass = classloader.loadClass(HiveClientImpl.class.getName()); + Constructor<?> hiveClientImplCtor = + hiveClientImplClass.getConstructor( + HiveClientClassLoader.HiveVersion.class, Properties.class); + return (HiveClient) hiveClientImplCtor.newInstance(version, properties); + } + + public static HiveClient createProxyHiveClientImpl( + HiveClientClassLoader.HiveVersion version, + Properties properties, + UserGroupInformation ugi, + ClassLoader classloader) + throws Exception { + Class<?> hiveClientImplClass = classloader.loadClass(ProxyHiveClientImpl.class.getName()); + Method createMethod = + Util.findStaticMethod( + hiveClientImplClass, + "createClient", + HiveClientClassLoader.HiveVersion.class, + UserGroupInformation.class, + Properties.class); + return (HiveClient) createMethod.invoke(null, version, ugi, properties); + } + + private HiveClient createHiveClientInternal(HiveClientClassLoader classloader) { + ClassLoader origLoader = Thread.currentThread().getContextClassLoader(); + Thread.currentThread().setContextClassLoader(classloader); + try { + UserGroupInformation ugi; + ugi = UserGroupInformation.getCurrentUser(); + if (!ugi.getUserName().equals(PrincipalUtils.getCurrentUserName())) { Review Comment: Why do we need to create a proxy user here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
