This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f5a8771a982cb270bbd7bce8e8253198c35c5ab4 Author: Yida Wu <[email protected]> AuthorDate: Wed Oct 16 19:33:59 2024 -0700 IMPALA-13411: Fix DCHECK fires for scan nodes that produce zero-length tuples Removed the DCHECK assertion that tuple_data_len must be greater than zero in tuple-file-writer.cc and tuple-file-reader.cc, because in certain cases, such as count(*), tuple_data_len can be zero, as no column data is returned and only the row count matters. Tests: Adds TestTupleCacheCountStar for the regression test. Change-Id: I264b537f0eb678b65081e90c21726198f254513d Reviewed-on: http://gerrit.cloudera.org:8080/21953 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/tuple-file-reader.cc | 3 ++- be/src/exec/tuple-file-writer.cc | 2 +- tests/custom_cluster/test_tuple_cache.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/be/src/exec/tuple-file-reader.cc b/be/src/exec/tuple-file-reader.cc index 02dd7f31c..04af197a4 100644 --- a/be/src/exec/tuple-file-reader.cc +++ b/be/src/exec/tuple-file-reader.cc @@ -87,7 +87,8 @@ Status TupleFileReader::GetNext(RuntimeState *state, kudu::ArrayView<kudu::Slice>(chunk_lens_slices)), "Failed to read cache file"); - if (header_len == 0 || tuple_data_len == 0 || tuple_offsets_len == 0) { + // tuple_data_len can be zero, see IMPALA-13411. + if (header_len == 0 || tuple_offsets_len == 0) { string err_msg = Substitute("Invalid data lengths at offset $0 in $1: " "header_len=$2, tuple_data_len=$3, tuple_offsets_len=$4", offset_, path_, header_len, tuple_data_len, tuple_offsets_len); diff --git a/be/src/exec/tuple-file-writer.cc b/be/src/exec/tuple-file-writer.cc index e68d0ae30..2a7d668fd 100644 --- a/be/src/exec/tuple-file-writer.cc +++ b/be/src/exec/tuple-file-writer.cc @@ -100,9 +100,9 @@ Status TupleFileWriter::Write(RuntimeState* state, RowBatch* row_batch) { DCHECK_GT(header_len, 0); kudu::Slice tuple_data = out.TupleDataAsSlice(); kudu::Slice tuple_offsets = out.TupleOffsetsAsSlice(); + // tuple_data_len is possible to be 0, see IMPALA-13411. size_t tuple_data_len = tuple_data.size(); size_t tuple_offsets_len = tuple_offsets.size(); - DCHECK_GT(tuple_data_len, 0); DCHECK_GT(tuple_offsets_len, 0); // We write things in this order (sizes first, then the variable-sized data): diff --git a/tests/custom_cluster/test_tuple_cache.py b/tests/custom_cluster/test_tuple_cache.py index 6c86726fb..54ce970d9 100644 --- a/tests/custom_cluster/test_tuple_cache.py +++ b/tests/custom_cluster/test_tuple_cache.py @@ -675,3 +675,31 @@ class TestTupleCacheRuntimeKeys(TestTupleCacheBase): assert len(before_result_set) + 1 == len(after_insert_result_set) different_rows = before_result_set.symmetric_difference(after_insert_result_set) assert len(different_rows) == 1 + + +class TestTupleCacheCountStar(TestTupleCacheBase): + + @classmethod + def add_test_dimensions(cls): + super(TestTupleCacheCountStar, cls).add_test_dimensions() + add_exec_option_dimension(cls, 'mt_dop', [0, 2]) + + @CustomClusterTestSuite.with_args( + start_args=CACHE_START_ARGS, cluster_size=1) + @pytest.mark.execute_serially + def test_tuple_cache_count_star(self, vector, unique_database): + """ + This test is a regression test for IMPALA-13411 to see whether it hits + the DCHECK. + """ + self.client.set_configuration(vector.get_value('exec_option')) + fq_table = "{0}.tuple_cache_count_star".format(unique_database) + + # Create a table. + self.create_table(fq_table, scale=1) + + # Run twice and see if it hits the DCHECK. + query = "select count(*) from {0}".format(fq_table) + result1 = self.execute_query(query) + result2 = self.execute_query(query) + assert result1.success and result2.success
