(impala) 01/02: IMPALA-12864: Deflake test_query_log_size_in_bytes.

boroknagyz Tue, 05 Mar 2024 02:02:59 -0800

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit ff5cbadec4885c09f69f37574d4f4c08f62744c1
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Mar 1 13:27:34 2024 -0800

    IMPALA-12864: Deflake test_query_log_size_in_bytes.
    
    test_query_log_size_in_bytes is flaky in exhaustive builds. The expected
    QueryStateRecord is off by around 100KB per query, indicating that the
    test query might be too complex and lead to variability in final query
    profile that is being archived.
    
    This patch simplifies the test query and relaxes the assertion. This
    patch also adds QUERY_LOG_EST_TOTAL_BYTES metric (key name
    "impala-server.query-log-est-total-bytes") that is validated as well in
    test_query_log_size_in_bytes. query-state-record-test.cc is modified a
    bit to resolve segmentation fault issue when building
    unified-be-test-validated-executable target run_clang_tidy.sh.
    
    Testing:
    - Pass test_query_log_size_in_bytes in exhaustive build.
    
    Change-Id: Ic9b543ffa931a01c11d58e45774ea46af78b3a25
    Reviewed-on: http://gerrit.cloudera.org:8080/21100
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/CMakeLists.txt             |  2 +-
 be/src/service/impala-server.cc           |  9 ++++----
 be/src/service/impala-server.h            |  6 ++----
 be/src/service/query-state-record-test.cc |  6 ++----
 be/src/service/query-state-record.h       |  2 +-
 be/src/util/impalad-metrics.cc            |  5 +++++
 be/src/util/impalad-metrics.h             |  5 +++++
 bin/run_clang_tidy.sh                     |  4 +++-
 common/thrift/metrics.json                | 10 +++++++++
 tests/custom_cluster/test_web_pages.py    | 35 ++++++++++++++++++-------------
 10 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index c5fc31226..7941c2e73 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -142,5 +142,5 @@ ADD_BE_TEST(session-expiry-test session-expiry-test.cc) # 
TODO: this leaks thrif
 ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test 
"StitchNullsTest.*:PrintTColumnValueTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(query-options-test QueryOptions.*)
 ADD_UNIFIED_BE_LSAN_TEST(impala-server-test ImpalaServerTest.*)
-ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test 
QueryStateRecordTest.*:PerHostStateTest.*)
+ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test QueryStateRecordTest.*)
 ADD_BE_LSAN_TEST(internal-server-test)
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 57880d2d6..ef3c29f36 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -1164,7 +1164,7 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& 
query_handle) {
   {
     lock_guard<mutex> l(query_log_lock_);
     // Add record to the beginning of the log, and to the lookup index.
-    query_log_est_total_size_ += record_size;
+    ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(record_size);
     query_log_est_sizes_.push_front(record_size);
     query_log_.push_front(move(record));
     query_log_index_[query_handle->query_id()] = &query_log_.front();
@@ -1172,12 +1172,13 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& 
query_handle) {
     while (!query_log_.empty()
         && ((FLAGS_query_log_size > -1 && FLAGS_query_log_size < 
query_log_.size())
             || (FLAGS_query_log_size_in_bytes > -1
-                && FLAGS_query_log_size_in_bytes < 
query_log_est_total_size_))) {
+                && FLAGS_query_log_size_in_bytes
+                    < ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue()))) 
{
       query_log_index_.erase(query_log_.back()->id);
-      query_log_est_total_size_ -= query_log_est_sizes_.back();
+      
ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(-query_log_est_sizes_.back());
       query_log_est_sizes_.pop_back();
       query_log_.pop_back();
-      DCHECK_GE(query_log_est_total_size_, 0);
+      DCHECK_GE(ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue(), 0);
       DCHECK_EQ(query_log_.size(), query_log_est_sizes_.size());
     }
   }
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index c456d803b..7e0d6b96d 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -1133,8 +1133,7 @@ class ImpalaServer : public ImpalaServiceIf,
   /// Random number generator for use in this class, thread safe.
   static ThreadSafeRandom rng_;
 
-  /// Guards query_log_, query_log_index_, query_log_est_sizes_,
-  /// and query_log_est_total_size_.
+  /// Guards query_log_, query_log_index_, and query_log_est_sizes_.
   std::mutex query_log_lock_;
 
   /// FIFO list of query records, which are written after the query finishes 
executing.
@@ -1153,9 +1152,8 @@ class ImpalaServer : public ImpalaServiceIf,
       QueryLogIndex;
   QueryLogIndex query_log_index_;
 
-  /// Estimated individual and total size of records in query_log_ in bytes.
+  /// Estimated individual size of records in query_log_ in bytes.
   std::list<int64_t> query_log_est_sizes_;
-  int64_t query_log_est_total_size_ = 0;
 
   /// Sets the given query_record (and retried_query_record too if given) for 
the given
   /// query_id. Returns an error Status if the given query_id cannot be found 
in the
diff --git a/be/src/service/query-state-record-test.cc 
b/be/src/service/query-state-record-test.cc
index 089462d9b..49bca460f 100644
--- a/be/src/service/query-state-record-test.cc
+++ b/be/src/service/query-state-record-test.cc
@@ -125,7 +125,7 @@ TEST(QueryStateRecordTest, EventsTimelineIterator) {
   }
 }
 
-TEST(PerHostStateTest, PeakMemoryComparatorLessThan) {
+TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorLessThan) {
   TNetworkAddress addr_a;
   PerHostState a;
   a.peak_memory_usage = std::numeric_limits<int64_t>::min();
@@ -140,15 +140,13 @@ TEST(PerHostStateTest, PeakMemoryComparatorLessThan) {
   EXPECT_FALSE(PerHostPeakMemoryComparator(pair_b, pair_a));
 }
 
-TEST(PerHostStateTest, PeakMemoryComparatorEqual) {
+TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorEqual) {
   TNetworkAddress addr_a;
   PerHostState a;
-  a.peak_memory_usage = 0;
   std::pair<TNetworkAddress, PerHostState> pair_a = std::make_pair(addr_a, a);
 
   TNetworkAddress addr_b;
   PerHostState b;
-  b.peak_memory_usage = 0;
   std::pair<TNetworkAddress, PerHostState> pair_b = std::make_pair(addr_b, b);
 
   EXPECT_FALSE(PerHostPeakMemoryComparator(pair_a, pair_b));
diff --git a/be/src/service/query-state-record.h 
b/be/src/service/query-state-record.h
index 957c7dff5..e30714ed7 100644
--- a/be/src/service/query-state-record.h
+++ b/be/src/service/query-state-record.h
@@ -178,7 +178,7 @@ struct PerHostState {
   int32_t fragment_instance_count = 0;
 
   // Peak Memory Usage
-  int64_t peak_memory_usage;
+  int64_t peak_memory_usage = 0;
 }; // struct PerHostState
 
 /// Comparator function that compares two PerHostState structs based on the
diff --git a/be/src/util/impalad-metrics.cc b/be/src/util/impalad-metrics.cc
index 697864087..2ec4814bd 100644
--- a/be/src/util/impalad-metrics.cc
+++ b/be/src/util/impalad-metrics.cc
@@ -167,6 +167,8 @@ const char* ImpaladMetricKeys::HEDGED_READ_OPS =
 const char* ImpaladMetricKeys::HEDGED_READ_OPS_WIN =
     "impala-server.hedged-read-ops-win";
 const char* ImpaladMetricKeys::DEBUG_ACTION_NUM_FAIL = 
"impala.debug_action.fail";
+const char* ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES =
+    "impala-server.query-log-est-total-bytes";
 
 // These are created by impala-server during startup.
 // =======
@@ -237,6 +239,7 @@ IntGauge* ImpaladMetrics::NUM_FILES_OPEN_FOR_INSERT = 
nullptr;
 IntGauge* ImpaladMetrics::NUM_QUERIES_REGISTERED = nullptr;
 IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_NUM_ROWS = nullptr;
 IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_BYTES = nullptr;
+IntGauge* ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES = nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr;
 DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr;
@@ -342,6 +345,8 @@ void ImpaladMetrics::CreateMetrics(MetricGroup* m) {
       ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_NUM_ROWS, 0);
   RESULTSET_CACHE_TOTAL_BYTES = m->AddGauge(
       ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_BYTES, 0);
+  QUERY_LOG_EST_TOTAL_BYTES =
+      m->AddGauge(ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES, 0);
 
   // Initialize scan node metrics
   NUM_RANGES_PROCESSED = m->AddCounter(
diff --git a/be/src/util/impalad-metrics.h b/be/src/util/impalad-metrics.h
index d206f916c..7170d752d 100644
--- a/be/src/util/impalad-metrics.h
+++ b/be/src/util/impalad-metrics.h
@@ -261,6 +261,10 @@ class ImpaladMetricKeys {
   /// --debug_actions is set.
   static const char* DEBUG_ACTION_NUM_FAIL;
 
+  // Estimated total size of query logs that are currently retained in memory.
+  // Associated metric is modified in ImpalaServer::ArchiveQuery and must hold
+  // ImpalaServer::query_log_lock_ on modification.
+  static const char* QUERY_LOG_EST_TOTAL_BYTES;
 };
 
 /// Global impalad-wide metrics.  This is useful for objects that want to 
update metrics
@@ -344,6 +348,7 @@ class ImpaladMetrics {
   static IntGauge* NUM_QUERIES_REGISTERED;
   static IntGauge* RESULTSET_CACHE_TOTAL_NUM_ROWS;
   static IntGauge* RESULTSET_CACHE_TOTAL_BYTES;
+  static IntGauge* QUERY_LOG_EST_TOTAL_BYTES;
 
   // Properties
   static BooleanProperty* CATALOG_READY;
diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh
index 7ca1d58da..ecc295640 100755
--- a/bin/run_clang_tidy.sh
+++ b/bin/run_clang_tidy.sh
@@ -33,7 +33,9 @@ TMP_BUILDALL_LOG=$(mktemp)
 echo "Compiling, for build logs see ${TMP_BUILDALL_LOG}"
 if ! ./buildall.sh -skiptests -tidy -so -noclean &> "${TMP_BUILDALL_LOG}"
 then
-  echo "buildall.sh failed, dumping output" >&2
+  echo "buildall.sh failed!" >&2
+  grep "^make.* Error " ${TMP_BUILDALL_LOG} >&2
+  echo "Dumping output of ./buildall.sh -skiptests -tidy -so -noclean" >&2
   cat "${TMP_BUILDALL_LOG}"
   exit 1
 fi
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index 78622bb90..0180b6ff9 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -2853,6 +2853,16 @@
     "kind": "HISTOGRAM",
     "key": "impala-server.query-durations-ms"
   },
+  {
+    "description": "Estimated total size of query logs that are currently 
retained in memory.",
+    "contexts": [
+        "IMPALAD"
+    ],
+    "label": "Query log size in memory.",
+    "units": "BYTES",
+    "kind": "GAUGE",
+    "key": "impala-server.query-log-est-total-bytes"
+  },
   {
     "description": "Distribution of DDL operation latencies",
     "contexts": [
diff --git a/tests/custom_cluster/test_web_pages.py 
b/tests/custom_cluster/test_web_pages.py
index f6f900bdb..9e521226e 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -30,6 +30,8 @@ from tests.common.custom_cluster_test_suite import (
   CustomClusterTestSuite)
 from tests.shell.util import run_impala_shell_cmd
 
+SMALL_QUERY_LOG_SIZE_IN_BYTES = 40 * 1024
+
 
 class TestWebPage(CustomClusterTestSuite):
   @classmethod
@@ -149,23 +151,28 @@ class TestWebPage(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-      impalad_args="--query_log_size_in_bytes=" + str(2 * 1024 * 1024)
+      cluster_size=1,
+      impalad_args="--query_log_size_in_bytes=" + 
str(SMALL_QUERY_LOG_SIZE_IN_BYTES)
   )
   def test_query_log_size_in_bytes(self):
     """Check if query list is limited by query_log_size_in_bytes flag."""
-    # The input query will produce ~627228 bytes of QueryStateRecord in 
MT_DOP=8.
-    query = ("with l1 as (select id id1 from functional.alltypes), "
-        "l2 as (select a1.id1 id2 from l1 as a1 join l1 as b1 on 
a1.id1=-b1.id1), "
-        "l3 as (select a2.id2 id3 from l2 as a2 join l2 as b2 on 
a2.id2=-b2.id2), "
-        "l4 as (select a3.id3 id4 from l3 as a3 join l3 as b3 on 
a3.id3=-b3.id3), "
-        "l5 as (select a4.id4 id5 from l4 as a4 join l4 as b4 on 
a4.id4=-b4.id4) "
-        "select * from l5 limit 100;")
-    self.execute_query("SET MT_DOP=8;")
-    for i in range(0, 5):
-      self.execute_query(query)
-    response = requests.get("http://localhost:25000/queries?json";)
-    queries_json = json.loads(response.text)
-    assert len(queries_json["completed_queries"]) == 3
+    # This simple test query will produce ~8520 bytes of QueryStateRecord.
+    query = "select version()"
+    num_queries = 10
+    for i in range(0, num_queries):
+      self.execute_query_expect_success(self.client, query)
+
+    # Retrieve and verify the total size metrics.
+    metric_key = "impala-server.query-log-est-total-bytes"
+    metric_value = 
self.cluster.impalads[0].service.get_metric_value(metric_key)
+    assert metric_value > 0
+    assert metric_value <= SMALL_QUERY_LOG_SIZE_IN_BYTES
+
+    # Verify that the query page only contains a subset of the test queries.
+    queries_response = requests.get("http://localhost:25000/queries?json";)
+    queries_json = json.loads(queries_response.text)
+    assert len(queries_json["completed_queries"]) > 0
+    assert len(queries_json["completed_queries"]) < num_queries
 
   # Checks if 'messages' exists/does not exist in 'result_stderr' based on the 
value of
   # 'should_exist'

(impala) 01/02: IMPALA-12864: Deflake test_query_log_size_in_bytes.

Reply via email to