This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit ff5cbadec4885c09f69f37574d4f4c08f62744c1 Author: Riza Suminto <[email protected]> AuthorDate: Fri Mar 1 13:27:34 2024 -0800 IMPALA-12864: Deflake test_query_log_size_in_bytes. test_query_log_size_in_bytes is flaky in exhaustive builds. The expected QueryStateRecord is off by around 100KB per query, indicating that the test query might be too complex and lead to variability in final query profile that is being archived. This patch simplifies the test query and relaxes the assertion. This patch also adds QUERY_LOG_EST_TOTAL_BYTES metric (key name "impala-server.query-log-est-total-bytes") that is validated as well in test_query_log_size_in_bytes. query-state-record-test.cc is modified a bit to resolve segmentation fault issue when building unified-be-test-validated-executable target run_clang_tidy.sh. Testing: - Pass test_query_log_size_in_bytes in exhaustive build. Change-Id: Ic9b543ffa931a01c11d58e45774ea46af78b3a25 Reviewed-on: http://gerrit.cloudera.org:8080/21100 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/service/CMakeLists.txt | 2 +- be/src/service/impala-server.cc | 9 ++++---- be/src/service/impala-server.h | 6 ++---- be/src/service/query-state-record-test.cc | 6 ++---- be/src/service/query-state-record.h | 2 +- be/src/util/impalad-metrics.cc | 5 +++++ be/src/util/impalad-metrics.h | 5 +++++ bin/run_clang_tidy.sh | 4 +++- common/thrift/metrics.json | 10 +++++++++ tests/custom_cluster/test_web_pages.py | 35 ++++++++++++++++++------------- 10 files changed, 55 insertions(+), 29 deletions(-) diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt index c5fc31226..7941c2e73 100644 --- a/be/src/service/CMakeLists.txt +++ b/be/src/service/CMakeLists.txt @@ -142,5 +142,5 @@ ADD_BE_TEST(session-expiry-test session-expiry-test.cc) # TODO: this leaks thrif ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test "StitchNullsTest.*:PrintTColumnValueTest.*") ADD_UNIFIED_BE_LSAN_TEST(query-options-test QueryOptions.*) ADD_UNIFIED_BE_LSAN_TEST(impala-server-test ImpalaServerTest.*) -ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test QueryStateRecordTest.*:PerHostStateTest.*) +ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test QueryStateRecordTest.*) ADD_BE_LSAN_TEST(internal-server-test) diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index 57880d2d6..ef3c29f36 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -1164,7 +1164,7 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& query_handle) { { lock_guard<mutex> l(query_log_lock_); // Add record to the beginning of the log, and to the lookup index. - query_log_est_total_size_ += record_size; + ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(record_size); query_log_est_sizes_.push_front(record_size); query_log_.push_front(move(record)); query_log_index_[query_handle->query_id()] = &query_log_.front(); @@ -1172,12 +1172,13 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& query_handle) { while (!query_log_.empty() && ((FLAGS_query_log_size > -1 && FLAGS_query_log_size < query_log_.size()) || (FLAGS_query_log_size_in_bytes > -1 - && FLAGS_query_log_size_in_bytes < query_log_est_total_size_))) { + && FLAGS_query_log_size_in_bytes + < ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue()))) { query_log_index_.erase(query_log_.back()->id); - query_log_est_total_size_ -= query_log_est_sizes_.back(); + ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(-query_log_est_sizes_.back()); query_log_est_sizes_.pop_back(); query_log_.pop_back(); - DCHECK_GE(query_log_est_total_size_, 0); + DCHECK_GE(ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue(), 0); DCHECK_EQ(query_log_.size(), query_log_est_sizes_.size()); } } diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index c456d803b..7e0d6b96d 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -1133,8 +1133,7 @@ class ImpalaServer : public ImpalaServiceIf, /// Random number generator for use in this class, thread safe. static ThreadSafeRandom rng_; - /// Guards query_log_, query_log_index_, query_log_est_sizes_, - /// and query_log_est_total_size_. + /// Guards query_log_, query_log_index_, and query_log_est_sizes_. std::mutex query_log_lock_; /// FIFO list of query records, which are written after the query finishes executing. @@ -1153,9 +1152,8 @@ class ImpalaServer : public ImpalaServiceIf, QueryLogIndex; QueryLogIndex query_log_index_; - /// Estimated individual and total size of records in query_log_ in bytes. + /// Estimated individual size of records in query_log_ in bytes. std::list<int64_t> query_log_est_sizes_; - int64_t query_log_est_total_size_ = 0; /// Sets the given query_record (and retried_query_record too if given) for the given /// query_id. Returns an error Status if the given query_id cannot be found in the diff --git a/be/src/service/query-state-record-test.cc b/be/src/service/query-state-record-test.cc index 089462d9b..49bca460f 100644 --- a/be/src/service/query-state-record-test.cc +++ b/be/src/service/query-state-record-test.cc @@ -125,7 +125,7 @@ TEST(QueryStateRecordTest, EventsTimelineIterator) { } } -TEST(PerHostStateTest, PeakMemoryComparatorLessThan) { +TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorLessThan) { TNetworkAddress addr_a; PerHostState a; a.peak_memory_usage = std::numeric_limits<int64_t>::min(); @@ -140,15 +140,13 @@ TEST(PerHostStateTest, PeakMemoryComparatorLessThan) { EXPECT_FALSE(PerHostPeakMemoryComparator(pair_b, pair_a)); } -TEST(PerHostStateTest, PeakMemoryComparatorEqual) { +TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorEqual) { TNetworkAddress addr_a; PerHostState a; - a.peak_memory_usage = 0; std::pair<TNetworkAddress, PerHostState> pair_a = std::make_pair(addr_a, a); TNetworkAddress addr_b; PerHostState b; - b.peak_memory_usage = 0; std::pair<TNetworkAddress, PerHostState> pair_b = std::make_pair(addr_b, b); EXPECT_FALSE(PerHostPeakMemoryComparator(pair_a, pair_b)); diff --git a/be/src/service/query-state-record.h b/be/src/service/query-state-record.h index 957c7dff5..e30714ed7 100644 --- a/be/src/service/query-state-record.h +++ b/be/src/service/query-state-record.h @@ -178,7 +178,7 @@ struct PerHostState { int32_t fragment_instance_count = 0; // Peak Memory Usage - int64_t peak_memory_usage; + int64_t peak_memory_usage = 0; }; // struct PerHostState /// Comparator function that compares two PerHostState structs based on the diff --git a/be/src/util/impalad-metrics.cc b/be/src/util/impalad-metrics.cc index 697864087..2ec4814bd 100644 --- a/be/src/util/impalad-metrics.cc +++ b/be/src/util/impalad-metrics.cc @@ -167,6 +167,8 @@ const char* ImpaladMetricKeys::HEDGED_READ_OPS = const char* ImpaladMetricKeys::HEDGED_READ_OPS_WIN = "impala-server.hedged-read-ops-win"; const char* ImpaladMetricKeys::DEBUG_ACTION_NUM_FAIL = "impala.debug_action.fail"; +const char* ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES = + "impala-server.query-log-est-total-bytes"; // These are created by impala-server during startup. // ======= @@ -237,6 +239,7 @@ IntGauge* ImpaladMetrics::NUM_FILES_OPEN_FOR_INSERT = nullptr; IntGauge* ImpaladMetrics::NUM_QUERIES_REGISTERED = nullptr; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_NUM_ROWS = nullptr; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_BYTES = nullptr; +IntGauge* ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr; @@ -342,6 +345,8 @@ void ImpaladMetrics::CreateMetrics(MetricGroup* m) { ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_NUM_ROWS, 0); RESULTSET_CACHE_TOTAL_BYTES = m->AddGauge( ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_BYTES, 0); + QUERY_LOG_EST_TOTAL_BYTES = + m->AddGauge(ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES, 0); // Initialize scan node metrics NUM_RANGES_PROCESSED = m->AddCounter( diff --git a/be/src/util/impalad-metrics.h b/be/src/util/impalad-metrics.h index d206f916c..7170d752d 100644 --- a/be/src/util/impalad-metrics.h +++ b/be/src/util/impalad-metrics.h @@ -261,6 +261,10 @@ class ImpaladMetricKeys { /// --debug_actions is set. static const char* DEBUG_ACTION_NUM_FAIL; + // Estimated total size of query logs that are currently retained in memory. + // Associated metric is modified in ImpalaServer::ArchiveQuery and must hold + // ImpalaServer::query_log_lock_ on modification. + static const char* QUERY_LOG_EST_TOTAL_BYTES; }; /// Global impalad-wide metrics. This is useful for objects that want to update metrics @@ -344,6 +348,7 @@ class ImpaladMetrics { static IntGauge* NUM_QUERIES_REGISTERED; static IntGauge* RESULTSET_CACHE_TOTAL_NUM_ROWS; static IntGauge* RESULTSET_CACHE_TOTAL_BYTES; + static IntGauge* QUERY_LOG_EST_TOTAL_BYTES; // Properties static BooleanProperty* CATALOG_READY; diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh index 7ca1d58da..ecc295640 100755 --- a/bin/run_clang_tidy.sh +++ b/bin/run_clang_tidy.sh @@ -33,7 +33,9 @@ TMP_BUILDALL_LOG=$(mktemp) echo "Compiling, for build logs see ${TMP_BUILDALL_LOG}" if ! ./buildall.sh -skiptests -tidy -so -noclean &> "${TMP_BUILDALL_LOG}" then - echo "buildall.sh failed, dumping output" >&2 + echo "buildall.sh failed!" >&2 + grep "^make.* Error " ${TMP_BUILDALL_LOG} >&2 + echo "Dumping output of ./buildall.sh -skiptests -tidy -so -noclean" >&2 cat "${TMP_BUILDALL_LOG}" exit 1 fi diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json index 78622bb90..0180b6ff9 100644 --- a/common/thrift/metrics.json +++ b/common/thrift/metrics.json @@ -2853,6 +2853,16 @@ "kind": "HISTOGRAM", "key": "impala-server.query-durations-ms" }, + { + "description": "Estimated total size of query logs that are currently retained in memory.", + "contexts": [ + "IMPALAD" + ], + "label": "Query log size in memory.", + "units": "BYTES", + "kind": "GAUGE", + "key": "impala-server.query-log-est-total-bytes" + }, { "description": "Distribution of DDL operation latencies", "contexts": [ diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py index f6f900bdb..9e521226e 100644 --- a/tests/custom_cluster/test_web_pages.py +++ b/tests/custom_cluster/test_web_pages.py @@ -30,6 +30,8 @@ from tests.common.custom_cluster_test_suite import ( CustomClusterTestSuite) from tests.shell.util import run_impala_shell_cmd +SMALL_QUERY_LOG_SIZE_IN_BYTES = 40 * 1024 + class TestWebPage(CustomClusterTestSuite): @classmethod @@ -149,23 +151,28 @@ class TestWebPage(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args="--query_log_size_in_bytes=" + str(2 * 1024 * 1024) + cluster_size=1, + impalad_args="--query_log_size_in_bytes=" + str(SMALL_QUERY_LOG_SIZE_IN_BYTES) ) def test_query_log_size_in_bytes(self): """Check if query list is limited by query_log_size_in_bytes flag.""" - # The input query will produce ~627228 bytes of QueryStateRecord in MT_DOP=8. - query = ("with l1 as (select id id1 from functional.alltypes), " - "l2 as (select a1.id1 id2 from l1 as a1 join l1 as b1 on a1.id1=-b1.id1), " - "l3 as (select a2.id2 id3 from l2 as a2 join l2 as b2 on a2.id2=-b2.id2), " - "l4 as (select a3.id3 id4 from l3 as a3 join l3 as b3 on a3.id3=-b3.id3), " - "l5 as (select a4.id4 id5 from l4 as a4 join l4 as b4 on a4.id4=-b4.id4) " - "select * from l5 limit 100;") - self.execute_query("SET MT_DOP=8;") - for i in range(0, 5): - self.execute_query(query) - response = requests.get("http://localhost:25000/queries?json") - queries_json = json.loads(response.text) - assert len(queries_json["completed_queries"]) == 3 + # This simple test query will produce ~8520 bytes of QueryStateRecord. + query = "select version()" + num_queries = 10 + for i in range(0, num_queries): + self.execute_query_expect_success(self.client, query) + + # Retrieve and verify the total size metrics. + metric_key = "impala-server.query-log-est-total-bytes" + metric_value = self.cluster.impalads[0].service.get_metric_value(metric_key) + assert metric_value > 0 + assert metric_value <= SMALL_QUERY_LOG_SIZE_IN_BYTES + + # Verify that the query page only contains a subset of the test queries. + queries_response = requests.get("http://localhost:25000/queries?json") + queries_json = json.loads(queries_response.text) + assert len(queries_json["completed_queries"]) > 0 + assert len(queries_json["completed_queries"]) < num_queries # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of # 'should_exist'
