This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9dc146de7752bb4f684203d34233deeb215f2bc6
Author: Saurabh Katiyal <[email protected]>
AuthorDate: Wed Nov 8 03:06:38 2023 +0530

    IMPALA-9086: Show Hive configurations in /hadoop-varz page
    
    It's a two part improvement:
    1. Updated /hadoop-varz static class implementation for coordinator webUI
       from: org.apache.hadoop.conf.Configuration
       to  : org.apache.hadoop.hive.conf.HiveConf (Inherits Configuration)
    
    2. Added similar implementation of /hadoop-varz for catalog webUI
    
    Now /hadoop-varz page has lot more information about HMS client
    and configuration including warehouse.dir
    
    Change-Id: I5af0eb68e71afeed64660d4d40584208ea503217
    Reviewed-on: http://gerrit.cloudera.org:8080/20682
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/catalog/catalog-server.cc                   | 29 ++++++++++++++++++++++
 be/src/catalog/catalog-server.h                    |  5 ++++
 be/src/catalog/catalog.cc                          |  5 ++++
 be/src/catalog/catalog.h                           |  3 +++
 be/src/service/impala-http-handler.cc              | 10 ++++++--
 .../java/org/apache/impala/service/JniCatalog.java | 24 ++++++++++++++++++
 .../org/apache/impala/service/JniFrontend.java     | 12 ++++++---
 tests/custom_cluster/test_web_pages.py             |  1 +
 tests/webserver/test_web_pages.py                  | 13 ++++++++++
 9 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc
index 01617555e..3402959b0 100644
--- a/be/src/catalog/catalog-server.cc
+++ b/be/src/catalog/catalog-server.cc
@@ -213,6 +213,8 @@ const string TABLE_METRICS_TEMPLATE = "table_metrics.tmpl";
 const string EVENT_WEB_PAGE = "/events";
 const string EVENT_METRICS_TEMPLATE = "events.tmpl";
 const string CATALOG_SERVICE_HEALTH_WEB_PAGE = "/healthz";
+const string HADOOP_VARZ_TEMPLATE = "hadoop-varz.tmpl";
+const string HADOOP_VARZ_WEB_PAGE = "/hadoop-varz";
 
 const int REFRESH_METRICS_INTERVAL_MS = 1000;
 
@@ -532,6 +534,8 @@ void CatalogServer::RegisterWebpages(Webserver* webserver, 
bool metrics_only) {
   webserver->RegisterUrlCallback(CATALOG_OPERATIONS_WEB_PAGE, 
CATALOG_OPERATIONS_TEMPLATE,
       [this](const auto& args, auto* doc) { 
this->OperationUsageUrlCallback(args, doc); },
       true);
+  webserver->RegisterUrlCallback(HADOOP_VARZ_WEB_PAGE, HADOOP_VARZ_TEMPLATE,
+      [this](const auto& args, auto* doc) { this->HadoopVarzHandler(args, 
doc); }, true);
   RegisterLogLevelCallbacks(webserver, true);
 }
 
@@ -1171,4 +1175,29 @@ void CatalogServer::HealthzHandler(
   *response = HttpStatusCode::ServiceUnavailable;
 }
 
+void CatalogServer::HadoopVarzHandler(const Webserver::WebRequest& req,
+    Document* document) {
+  TGetAllHadoopConfigsResponse response;
+  Status status  = catalog_->GetAllHadoopConfigs(&response);
+  if (!status.ok()) {
+    LOG(ERROR) << "Error getting cluster configuration for hadoop-varz: "
+               << status.GetDetail();
+    Value error(status.GetDetail().c_str(), document->GetAllocator());
+    document->AddMember("error", error, document->GetAllocator());
+    return;
+  }
+
+  Value configs(kArrayType);
+  typedef map<string, string> ConfigMap;
+  for (const auto& config: response.configs) {
+    Value key(config.first.c_str(), document->GetAllocator());
+    Value value(config.second.c_str(), document->GetAllocator());
+    Value config_json(kObjectType);
+    config_json.AddMember("key", key, document->GetAllocator());
+    config_json.AddMember("value", value, document->GetAllocator());
+    configs.PushBack(config_json, document->GetAllocator());
+  }
+  document->AddMember("configs", configs, document->GetAllocator());
+}
+
 }
diff --git a/be/src/catalog/catalog-server.h b/be/src/catalog/catalog-server.h
index e8c4513db..5b3f527da 100644
--- a/be/src/catalog/catalog-server.h
+++ b/be/src/catalog/catalog-server.h
@@ -356,6 +356,11 @@ class CatalogServer {
   /// Raw callback to indicate whether the service is ready.
   void HealthzHandler(const Webserver::WebRequest& req, std::stringstream* 
data,
       HttpStatusCode* response);
+
+  /// Json callback for /hadoop-varz. Produces Json with a list, 'configs', of 
(key,
+  /// value) pairs, one for each Hadoop configuration value.
+  void HadoopVarzHandler(const Webserver::WebRequest& req,
+      rapidjson::Document* document);
 };
 
 }
diff --git a/be/src/catalog/catalog.cc b/be/src/catalog/catalog.cc
index 92b306b51..310424270 100644
--- a/be/src/catalog/catalog.cc
+++ b/be/src/catalog/catalog.cc
@@ -73,6 +73,7 @@ Catalog::Catalog() {
     {"refreshDataSources", "()V", &refresh_data_sources_},
     {"getNullPartitionName", "()[B", &get_null_partition_name_id_},
     {"getLatestCompactions", "([B)[B", &get_latest_compactions_id_},
+    {"getAllHadoopConfigs", "()[B", &get_hadoop_configs_id_},
   };
 
   JNIEnv* jni_env = JniUtil::GetJNIEnv();
@@ -213,6 +214,10 @@ Status Catalog::UpdateTableUsage(const 
TUpdateTableUsageRequest& req) {
   return JniUtil::CallJniMethod(catalog_, update_table_usage_id_, req);
 }
 
+Status Catalog::GetAllHadoopConfigs(TGetAllHadoopConfigsResponse* result) {
+  return JniUtil::CallJniMethod(catalog_, get_hadoop_configs_id_, result);
+}
+
 void Catalog::RegenerateServiceId() {
   JNIEnv* jni_env = JniUtil::GetJNIEnv();
   jni_env->CallVoidMethod(catalog_, regenerate_service_id_);
diff --git a/be/src/catalog/catalog.h b/be/src/catalog/catalog.h
index f102a2908..489a84b45 100644
--- a/be/src/catalog/catalog.h
+++ b/be/src/catalog/catalog.h
@@ -152,6 +152,8 @@ class Catalog {
   /// Returns OK if the refreshing was successful, otherwise a Status object 
with
   /// information on the error will be returned.
   Status RefreshDataSources();
+  /// Returns all Hadoop configurations in key, value form in result.
+  Status GetAllHadoopConfigs(TGetAllHadoopConfigsResponse* result);
 
  private:
   jobject catalog_;  // instance of org.apache.impala.service.JniCatalog
@@ -179,6 +181,7 @@ class Catalog {
   jmethodID refresh_data_sources_; // JniCatalog.refreshDataSources()
   jmethodID get_null_partition_name_id_; // JniCatalog.getNullPartitionName()
   jmethodID get_latest_compactions_id_; // JniCatalog.getLatestCompactions()
+  jmethodID get_hadoop_configs_id_;  // JniCatalog.getAllHadoopConfigs()
 };
 
 }
diff --git a/be/src/service/impala-http-handler.cc 
b/be/src/service/impala-http-handler.cc
index 79de37006..04579866d 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -230,11 +230,17 @@ void ImpalaHttpHandler::HadoopVarzHandler(const 
Webserver::WebRequest& req,
     Document* document) {
   TGetAllHadoopConfigsResponse response;
   Status status  = 
server_->exec_env_->frontend()->GetAllHadoopConfigs(&response);
-  if (!status.ok()) return;
+  if (!status.ok()) {
+    LOG(ERROR) << "Error getting cluster configuration for hadoop-varz: "
+               << status.GetDetail();
+    Value error(status.GetDetail().c_str(), document->GetAllocator());
+    document->AddMember("error", error, document->GetAllocator());
+    return;
+  }
 
   Value configs(kArrayType);
   typedef map<string, string> ConfigMap;
-  for (const ConfigMap::value_type& config: response.configs) {
+  for (const auto& config: response.configs) {
     Value key(config.first.c_str(), document->GetAllocator());
     Value value(config.second.c_str(), document->GetAllocator());
     Value config_json(kObjectType);
diff --git a/fe/src/main/java/org/apache/impala/service/JniCatalog.java 
b/fe/src/main/java/org/apache/impala/service/JniCatalog.java
index c25faabaa..21fc2a63a 100644
--- a/fe/src/main/java/org/apache/impala/service/JniCatalog.java
+++ b/fe/src/main/java/org/apache/impala/service/JniCatalog.java
@@ -21,12 +21,15 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import java.util.Map;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.UUID;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.stream.Collectors;
 import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.authorization.AuthorizationFactory;
@@ -85,6 +88,7 @@ import org.apache.impala.thrift.TTableUsage;
 import org.apache.impala.thrift.TUniqueId;
 import org.apache.impala.thrift.TUpdateCatalogRequest;
 import org.apache.impala.thrift.TUpdateTableUsageRequest;
+import org.apache.impala.thrift.TGetAllHadoopConfigsResponse;
 import org.apache.impala.util.AuthorizationUtil;
 import org.apache.impala.util.CatalogOpUtil;
 import org.apache.impala.util.GlogAppender;
@@ -126,6 +130,8 @@ public class JniCatalog {
     return new TUniqueId(uuid.getMostSignificantBits(), 
uuid.getLeastSignificantBits());
   }
 
+  private static final HiveConf HIVE_CONF = new HiveConf();
+
   public JniCatalog(byte[] thriftBackendConfig) throws ImpalaException {
     TBackendGflags cfg = new TBackendGflags();
     JniUtil.deserializeThrift(protocolFactory_, cfg, thriftBackendConfig);
@@ -594,4 +600,22 @@ public class JniCatalog {
       return response;
     });
   }
+
+  /**
+   * Returns the serialized byte array of TGetAllHadoopConfigsResponse
+   */
+  public byte[] getAllHadoopConfigs() throws ImpalaException {
+    Map<String, String> configs = Maps.newHashMap();
+    for (Map.Entry<String, String> e: HIVE_CONF) {
+      configs.put(e.getKey(), e.getValue());
+    }
+    TGetAllHadoopConfigsResponse result = new TGetAllHadoopConfigsResponse();
+    result.setConfigs(configs);
+    try {
+      TSerializer serializer = new TSerializer(protocolFactory_);
+      return serializer.serialize(result);
+    } catch (TException e) {
+      throw new InternalException(e.getMessage());
+    }
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java 
b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index 4c6d05f7c..77221e52b 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
@@ -617,17 +618,20 @@ public class JniFrontend {
     return frontend_.getCatalog().getTable(tableName.db_name, 
tableName.table_name);
   }
 
-  // Caching this saves ~50ms per call to getHadoopConfigAsHtml
+  // Caching this saves ~50ms per call to getHadoopConfig
   private static final Configuration CONF = new Configuration();
   private static final Groups GROUPS = 
Groups.getUserToGroupsMappingService(CONF);
 
+  // Caching this saves ~50ms per call to getAllHadoopConfigs
+  // org.apache.hadoop.hive.conf.HiveConf inherrits 
org.apache.hadoop.conf.Configuration
+  private static final HiveConf HIVE_CONF = new HiveConf();
+
   /**
-   * Returns a string of all loaded Hadoop configuration parameters as a table 
of keys
-   * and values. If asText is true, output in raw text. Otherwise, output in 
html.
+   * Returns the serialized byte array of TGetAllHadoopConfigsResponse
    */
   public byte[] getAllHadoopConfigs() throws ImpalaException {
     Map<String, String> configs = Maps.newHashMap();
-    for (Map.Entry<String, String> e: CONF) {
+    for (Map.Entry<String, String> e: HIVE_CONF) {
       configs.put(e.getKey(), e.getValue());
     }
     TGetAllHadoopConfigsResponse result = new TGetAllHadoopConfigsResponse();
diff --git a/tests/custom_cluster/test_web_pages.py 
b/tests/custom_cluster/test_web_pages.py
index 2bd40764a..a8e810d97 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -227,6 +227,7 @@ class TestWebPage(CustomClusterTestSuite):
       expected_catalog_links = [
         "/",
         "/catalog",
+        "/hadoop-varz",
         "/events",
         "/jmx",
         "/log_level",
diff --git a/tests/webserver/test_web_pages.py 
b/tests/webserver/test_web_pages.py
index 35f69519e..9757b0d72 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -59,6 +59,7 @@ class TestWebPage(ImpalaTestSuite):
   QUERIES_URL = "http://localhost:{0}/queries";
   HEALTHZ_URL = "http://localhost:{0}/healthz";
   EVENT_PROCESSOR_URL = "http://localhost:{0}/events";
+  HADOOP_VARZ_URL = "http://localhost:{0}/hadoop-varz";
 
   # log4j changes do not apply to the statestore since it doesn't
   # have an embedded JVM. So we make two sets of ports to test the
@@ -1110,6 +1111,18 @@ class TestWebPage(ImpalaTestSuite):
                         if q['query_id'] == query_id]
     assert len(expected_queries) == 1
 
+  @pytest.mark.execute_serially
+  def test_hadoop_varz_page(self):
+    """test for /hadoop-var to check availablity of haqoop configuration like
+    hive warehouse dir, fs.defaultFS"""
+    responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
+        "hive.metastore.warehouse.dir", 
ports_to_test=self.TEST_PORTS_WITHOUT_SS)
+    responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
+        "hive.metastore.warehouse.external.dir", 
ports_to_test=self.TEST_PORTS_WITHOUT_SS)
+    responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
+        "fs.defaultFS", ports_to_test=self.TEST_PORTS_WITHOUT_SS)
+    # check if response size is 2 , for both catalog and impalad webUI
+    assert len(responses) == 2
 
 class TestWebPageAndCloseSession(ImpalaTestSuite):
   ROOT_URL = "http://localhost:{0}/";

Reply via email to