This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9dc146de7752bb4f684203d34233deeb215f2bc6 Author: Saurabh Katiyal <[email protected]> AuthorDate: Wed Nov 8 03:06:38 2023 +0530 IMPALA-9086: Show Hive configurations in /hadoop-varz page It's a two part improvement: 1. Updated /hadoop-varz static class implementation for coordinator webUI from: org.apache.hadoop.conf.Configuration to : org.apache.hadoop.hive.conf.HiveConf (Inherits Configuration) 2. Added similar implementation of /hadoop-varz for catalog webUI Now /hadoop-varz page has lot more information about HMS client and configuration including warehouse.dir Change-Id: I5af0eb68e71afeed64660d4d40584208ea503217 Reviewed-on: http://gerrit.cloudera.org:8080/20682 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/catalog/catalog-server.cc | 29 ++++++++++++++++++++++ be/src/catalog/catalog-server.h | 5 ++++ be/src/catalog/catalog.cc | 5 ++++ be/src/catalog/catalog.h | 3 +++ be/src/service/impala-http-handler.cc | 10 ++++++-- .../java/org/apache/impala/service/JniCatalog.java | 24 ++++++++++++++++++ .../org/apache/impala/service/JniFrontend.java | 12 ++++++--- tests/custom_cluster/test_web_pages.py | 1 + tests/webserver/test_web_pages.py | 13 ++++++++++ 9 files changed, 96 insertions(+), 6 deletions(-) diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc index 01617555e..3402959b0 100644 --- a/be/src/catalog/catalog-server.cc +++ b/be/src/catalog/catalog-server.cc @@ -213,6 +213,8 @@ const string TABLE_METRICS_TEMPLATE = "table_metrics.tmpl"; const string EVENT_WEB_PAGE = "/events"; const string EVENT_METRICS_TEMPLATE = "events.tmpl"; const string CATALOG_SERVICE_HEALTH_WEB_PAGE = "/healthz"; +const string HADOOP_VARZ_TEMPLATE = "hadoop-varz.tmpl"; +const string HADOOP_VARZ_WEB_PAGE = "/hadoop-varz"; const int REFRESH_METRICS_INTERVAL_MS = 1000; @@ -532,6 +534,8 @@ void CatalogServer::RegisterWebpages(Webserver* webserver, bool metrics_only) { webserver->RegisterUrlCallback(CATALOG_OPERATIONS_WEB_PAGE, CATALOG_OPERATIONS_TEMPLATE, [this](const auto& args, auto* doc) { this->OperationUsageUrlCallback(args, doc); }, true); + webserver->RegisterUrlCallback(HADOOP_VARZ_WEB_PAGE, HADOOP_VARZ_TEMPLATE, + [this](const auto& args, auto* doc) { this->HadoopVarzHandler(args, doc); }, true); RegisterLogLevelCallbacks(webserver, true); } @@ -1171,4 +1175,29 @@ void CatalogServer::HealthzHandler( *response = HttpStatusCode::ServiceUnavailable; } +void CatalogServer::HadoopVarzHandler(const Webserver::WebRequest& req, + Document* document) { + TGetAllHadoopConfigsResponse response; + Status status = catalog_->GetAllHadoopConfigs(&response); + if (!status.ok()) { + LOG(ERROR) << "Error getting cluster configuration for hadoop-varz: " + << status.GetDetail(); + Value error(status.GetDetail().c_str(), document->GetAllocator()); + document->AddMember("error", error, document->GetAllocator()); + return; + } + + Value configs(kArrayType); + typedef map<string, string> ConfigMap; + for (const auto& config: response.configs) { + Value key(config.first.c_str(), document->GetAllocator()); + Value value(config.second.c_str(), document->GetAllocator()); + Value config_json(kObjectType); + config_json.AddMember("key", key, document->GetAllocator()); + config_json.AddMember("value", value, document->GetAllocator()); + configs.PushBack(config_json, document->GetAllocator()); + } + document->AddMember("configs", configs, document->GetAllocator()); +} + } diff --git a/be/src/catalog/catalog-server.h b/be/src/catalog/catalog-server.h index e8c4513db..5b3f527da 100644 --- a/be/src/catalog/catalog-server.h +++ b/be/src/catalog/catalog-server.h @@ -356,6 +356,11 @@ class CatalogServer { /// Raw callback to indicate whether the service is ready. void HealthzHandler(const Webserver::WebRequest& req, std::stringstream* data, HttpStatusCode* response); + + /// Json callback for /hadoop-varz. Produces Json with a list, 'configs', of (key, + /// value) pairs, one for each Hadoop configuration value. + void HadoopVarzHandler(const Webserver::WebRequest& req, + rapidjson::Document* document); }; } diff --git a/be/src/catalog/catalog.cc b/be/src/catalog/catalog.cc index 92b306b51..310424270 100644 --- a/be/src/catalog/catalog.cc +++ b/be/src/catalog/catalog.cc @@ -73,6 +73,7 @@ Catalog::Catalog() { {"refreshDataSources", "()V", &refresh_data_sources_}, {"getNullPartitionName", "()[B", &get_null_partition_name_id_}, {"getLatestCompactions", "([B)[B", &get_latest_compactions_id_}, + {"getAllHadoopConfigs", "()[B", &get_hadoop_configs_id_}, }; JNIEnv* jni_env = JniUtil::GetJNIEnv(); @@ -213,6 +214,10 @@ Status Catalog::UpdateTableUsage(const TUpdateTableUsageRequest& req) { return JniUtil::CallJniMethod(catalog_, update_table_usage_id_, req); } +Status Catalog::GetAllHadoopConfigs(TGetAllHadoopConfigsResponse* result) { + return JniUtil::CallJniMethod(catalog_, get_hadoop_configs_id_, result); +} + void Catalog::RegenerateServiceId() { JNIEnv* jni_env = JniUtil::GetJNIEnv(); jni_env->CallVoidMethod(catalog_, regenerate_service_id_); diff --git a/be/src/catalog/catalog.h b/be/src/catalog/catalog.h index f102a2908..489a84b45 100644 --- a/be/src/catalog/catalog.h +++ b/be/src/catalog/catalog.h @@ -152,6 +152,8 @@ class Catalog { /// Returns OK if the refreshing was successful, otherwise a Status object with /// information on the error will be returned. Status RefreshDataSources(); + /// Returns all Hadoop configurations in key, value form in result. + Status GetAllHadoopConfigs(TGetAllHadoopConfigsResponse* result); private: jobject catalog_; // instance of org.apache.impala.service.JniCatalog @@ -179,6 +181,7 @@ class Catalog { jmethodID refresh_data_sources_; // JniCatalog.refreshDataSources() jmethodID get_null_partition_name_id_; // JniCatalog.getNullPartitionName() jmethodID get_latest_compactions_id_; // JniCatalog.getLatestCompactions() + jmethodID get_hadoop_configs_id_; // JniCatalog.getAllHadoopConfigs() }; } diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc index 79de37006..04579866d 100644 --- a/be/src/service/impala-http-handler.cc +++ b/be/src/service/impala-http-handler.cc @@ -230,11 +230,17 @@ void ImpalaHttpHandler::HadoopVarzHandler(const Webserver::WebRequest& req, Document* document) { TGetAllHadoopConfigsResponse response; Status status = server_->exec_env_->frontend()->GetAllHadoopConfigs(&response); - if (!status.ok()) return; + if (!status.ok()) { + LOG(ERROR) << "Error getting cluster configuration for hadoop-varz: " + << status.GetDetail(); + Value error(status.GetDetail().c_str(), document->GetAllocator()); + document->AddMember("error", error, document->GetAllocator()); + return; + } Value configs(kArrayType); typedef map<string, string> ConfigMap; - for (const ConfigMap::value_type& config: response.configs) { + for (const auto& config: response.configs) { Value key(config.first.c_str(), document->GetAllocator()); Value value(config.second.c_str(), document->GetAllocator()); Value config_json(kObjectType); diff --git a/fe/src/main/java/org/apache/impala/service/JniCatalog.java b/fe/src/main/java/org/apache/impala/service/JniCatalog.java index c25faabaa..21fc2a63a 100644 --- a/fe/src/main/java/org/apache/impala/service/JniCatalog.java +++ b/fe/src/main/java/org/apache/impala/service/JniCatalog.java @@ -21,12 +21,15 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import java.util.Map; import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.impala.analysis.TableName; import org.apache.impala.authorization.AuthorizationConfig; import org.apache.impala.authorization.AuthorizationFactory; @@ -85,6 +88,7 @@ import org.apache.impala.thrift.TTableUsage; import org.apache.impala.thrift.TUniqueId; import org.apache.impala.thrift.TUpdateCatalogRequest; import org.apache.impala.thrift.TUpdateTableUsageRequest; +import org.apache.impala.thrift.TGetAllHadoopConfigsResponse; import org.apache.impala.util.AuthorizationUtil; import org.apache.impala.util.CatalogOpUtil; import org.apache.impala.util.GlogAppender; @@ -126,6 +130,8 @@ public class JniCatalog { return new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits()); } + private static final HiveConf HIVE_CONF = new HiveConf(); + public JniCatalog(byte[] thriftBackendConfig) throws ImpalaException { TBackendGflags cfg = new TBackendGflags(); JniUtil.deserializeThrift(protocolFactory_, cfg, thriftBackendConfig); @@ -594,4 +600,22 @@ public class JniCatalog { return response; }); } + + /** + * Returns the serialized byte array of TGetAllHadoopConfigsResponse + */ + public byte[] getAllHadoopConfigs() throws ImpalaException { + Map<String, String> configs = Maps.newHashMap(); + for (Map.Entry<String, String> e: HIVE_CONF) { + configs.put(e.getKey(), e.getValue()); + } + TGetAllHadoopConfigsResponse result = new TGetAllHadoopConfigsResponse(); + result.setConfigs(configs); + try { + TSerializer serializer = new TSerializer(protocolFactory_); + return serializer.serialize(result); + } catch (TException e) { + throw new InternalException(e.getMessage()); + } + } } diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 4c6d05f7c..77221e52b 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; @@ -617,17 +618,20 @@ public class JniFrontend { return frontend_.getCatalog().getTable(tableName.db_name, tableName.table_name); } - // Caching this saves ~50ms per call to getHadoopConfigAsHtml + // Caching this saves ~50ms per call to getHadoopConfig private static final Configuration CONF = new Configuration(); private static final Groups GROUPS = Groups.getUserToGroupsMappingService(CONF); + // Caching this saves ~50ms per call to getAllHadoopConfigs + // org.apache.hadoop.hive.conf.HiveConf inherrits org.apache.hadoop.conf.Configuration + private static final HiveConf HIVE_CONF = new HiveConf(); + /** - * Returns a string of all loaded Hadoop configuration parameters as a table of keys - * and values. If asText is true, output in raw text. Otherwise, output in html. + * Returns the serialized byte array of TGetAllHadoopConfigsResponse */ public byte[] getAllHadoopConfigs() throws ImpalaException { Map<String, String> configs = Maps.newHashMap(); - for (Map.Entry<String, String> e: CONF) { + for (Map.Entry<String, String> e: HIVE_CONF) { configs.put(e.getKey(), e.getValue()); } TGetAllHadoopConfigsResponse result = new TGetAllHadoopConfigsResponse(); diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py index 2bd40764a..a8e810d97 100644 --- a/tests/custom_cluster/test_web_pages.py +++ b/tests/custom_cluster/test_web_pages.py @@ -227,6 +227,7 @@ class TestWebPage(CustomClusterTestSuite): expected_catalog_links = [ "/", "/catalog", + "/hadoop-varz", "/events", "/jmx", "/log_level", diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py index 35f69519e..9757b0d72 100644 --- a/tests/webserver/test_web_pages.py +++ b/tests/webserver/test_web_pages.py @@ -59,6 +59,7 @@ class TestWebPage(ImpalaTestSuite): QUERIES_URL = "http://localhost:{0}/queries" HEALTHZ_URL = "http://localhost:{0}/healthz" EVENT_PROCESSOR_URL = "http://localhost:{0}/events" + HADOOP_VARZ_URL = "http://localhost:{0}/hadoop-varz" # log4j changes do not apply to the statestore since it doesn't # have an embedded JVM. So we make two sets of ports to test the @@ -1110,6 +1111,18 @@ class TestWebPage(ImpalaTestSuite): if q['query_id'] == query_id] assert len(expected_queries) == 1 + @pytest.mark.execute_serially + def test_hadoop_varz_page(self): + """test for /hadoop-var to check availablity of haqoop configuration like + hive warehouse dir, fs.defaultFS""" + responses = self.get_and_check_status(self.HADOOP_VARZ_URL, + "hive.metastore.warehouse.dir", ports_to_test=self.TEST_PORTS_WITHOUT_SS) + responses = self.get_and_check_status(self.HADOOP_VARZ_URL, + "hive.metastore.warehouse.external.dir", ports_to_test=self.TEST_PORTS_WITHOUT_SS) + responses = self.get_and_check_status(self.HADOOP_VARZ_URL, + "fs.defaultFS", ports_to_test=self.TEST_PORTS_WITHOUT_SS) + # check if response size is 2 , for both catalog and impalad webUI + assert len(responses) == 2 class TestWebPageAndCloseSession(ImpalaTestSuite): ROOT_URL = "http://localhost:{0}/"
